#!/usr/bin/env bash
#  SPDX-License-Identifier: BSD-3-Clause
#  Copyright (C) 2023 Intel Corporation
#  All rights reserved.

# Do similar thing to what turbostat.c does but in a more compact
# scope. We just report temp as per coretemp's hwmon entries for
# each core|package and check if there is any indication that
# throttling took place (per cpu thread).
set -e
shopt -s extglob nullglob

pmdir=$(readlink -f "$(dirname "$0")")
rootdir=$(readlink -f "$pmdir/../../../")
source "$rootdir/test/scheduler/common.sh"
source "$pmdir/common"

to_celsius() { echo $(($1 / 1000)); }

report_hwmon() {
	local hwmon hw_name

	for hwmon in /sys/class/hwmon/hwmon*; do
		[[ -e $hwmon/name ]] || continue
		hw_name=$(< "$hwmon/name")
		case "$hw_name" in
			coretemp) report_hwmon_coretemp "$hwmon" ;;
			*) report_hwmon_generic "$hwmon" 2> /dev/null ;;
		esac || continue
	done
}

report_hwmon_generic() {
	local hwmon=$1 temp ts
	local label dev

	ts=$(dater)

	# We report just the bare minimum as each device may come with
	# different set of labels, inputs, etc.
	[[ -e $hwmon/temp1_input ]] || return 0

	# These entries, especially the ones dedicated for nvme, may disappear during
	# tests, so try to handle them gracefully.

	temp=$(< "$hwmon/temp1_input") && label=$(< "$hwmon/name") || return 0

	if [[ -e $hwmon/temp1_label ]]; then
		label+=":$(< "$hwmon/temp1_label")"
	fi

	if [[ -e $hwmon/device ]] && dev=$(readlink -f "$hwmon/device"); then
		label+=":${dev##*/}"
	fi

	printf '(%s) --- %s (%u C) (test:%s)\n' \
		"$ts" \
		"$label" \
		"$(to_celsius "$temp")" \
		"$TEST_TAG"
	printf '%s\n' "---"
}

report_hwmon_coretemp() {
	local hwmon=$1 temp ts
	local core crit input id label max node package threads
	local cores_input=() cores_crit=() cores_max=()

	ts=$(dater)

	for label in "$hwmon/"temp+([0-9])_label; do
		temp=${label%_label}
		label=$(< "$label")
		input=$(< "${temp}_input")
		crit=$(< "${temp}_crit")
		max=$(< "${temp}_max")
		id=${label##* }
		case "$label" in
			Core*)
				cores_input[id]=$input
				cores_crit[id]=$crit
				cores_max[id]=$max
				;;
			Package*) node=$id package[node]=$input ;;
		esac
	done
	# No package_id? Something is amiss
	# FIXME: This is a cheeky assumption that each physical package (socket) N maps to
	# a corresponding numa node N. For physical systems this may be usually the case
	# but for quirky vNUMA setups not necessarily. For similar systems (don't have
	# any at hand and it's a bit tricky to test thermal stuff under VMs) this probably
	# would need to drop the lookup of the "Package" label and just check each thread's
	# physical_package_id and/or numa assignment (cpu_node_map[@]).
	[[ -n $node ]] || return 1

	printf '(%s) --- Node%u (%u C) (test:%s)\n' \
		"$ts" \
		"$node" \
		"$(to_celsius "${package[node]}")" \
		"$TEST_TAG"

	for core in "${!cores_input[@]}"; do
		threads=($(get_cpus "$node" "$core"))
		printf '  (%s) Core%u (%s): %u C (crit: %u C, max: %u C)\n' \
			"$ts" \
			"$core" \
			"${threads[*]}" \
			"$(to_celsius "${cores_input[core]}")" \
			"$(to_celsius "${cores_crit[core]}")" \
			"$(to_celsius "${cores_max[core]}")"
	done
	printf '%s\n' "---"
}

report_throttling() {
	# Quick check to see if MSRs report proper support - if this entry does not exist,
	# then there's no point in looking up entire topology, support is simply not
	# there.
	[[ -e $sysfs_cpu/cpu0/thermal_throttle/core_throttle_count ]] || return 1

	local cpu cpu_throttling=() cpu_throttling_time=() throttler=()
	local throttle_count throttle_time
	local node node_cpus=() node_throttled
	local ts

	throttler[0]="Normal Operation"
	throttler[1]="Throttled"

	ts=$(dater)
	# Order the output similarly to hwmon, starting with a node|package
	for node in "${nodes[@]}"; do
		node_cpus=($(get_cpus "$node")) node_throttled=0 cpu_throttling=()
		for cpu in "${node_cpus[@]}"; do
			throttle_count=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_count")
			throttle_time=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_total_time_ms")
			cpu_throttling[cpu]=$throttle_count
			cpu_throttling_time[cpu]=$((throttle_time / 1000))
			if ((throttle_count > 0 || throttle_time > 0)); then
				node_throttled=1
			fi
		done

		((node_throttled == 1 || debug == 1)) || continue

		printf '(%s) ###### Throttling Node%u Status: %s ######\n' \
			"$ts" "$node" "${throttler[node_throttled]}"

		for cpu in "${!cpu_throttling[@]}"; do
			((cpu_throttling[cpu] > 0)) || continue
			printf '(%s) CPU%u: %s (count: %u, time: %us)\n' \
				"$ts" "$cpu" \
				"${throttler[cpu_throttling[cpu] > 0 ? 1 : 0]}" \
				"${cpu_throttling[cpu]}" \
				"${cpu_throttling_time[cpu]}"
		done
	done
}

init_modules() {
	local -gA modules_supported=()
	local -gA modules_out_refs=()
	local -ga modules_to_run=()
	local module

	modules_supported["hwmon"]=report_hwmon
	modules_supported["throttle"]=report_throttling

	for module in "${@:-"${!modules_supported[@]}"}"; do
		if [[ -z ${modules_supported["$module"]} ]]; then
			printf 'Module (%s) not supported\n' "$module" >&2
			return 1
		fi
		modules_to_run+=("${modules_supported["$module"]}")
		modules_out_refs["${modules_supported["$module"]}"]="_${modules_supported["$module"]}"
	done
}

collect_readings() {
	local _count=$count module data

	map_cpus

	while ((count <= 0 ? 1 : _count--)); do
		for module in "${modules_to_run[@]}"; do
			local -n ref=${modules_out_refs["$module"]}
			data=$("$module")
			[[ -n $data ]] && ref+=("$data") && echo "$data"
		done
		sleep "${interval}s"
	done
}

dater() {
	date "+%R:%S %Z"
}

cleanup() {
	local module

	for module in "${!modules_out_refs[@]}"; do
		local -n _ref=${modules_out_refs["$module"]}
		((${#_ref[@]} > 0)) || continue
		printf '%s\n' "${_ref[@]}" > "$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt"
		printf 'Dumped %s module to %s\n' \
			"$module" \
			"$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt"
	done

	rm_pm_pid
}

help() {
	cat <<- HELP

		Usage: $0 [-h] [-c count] [-d dir] [-l] [-p prefix] [-t interval] [module0 module1 ...]

		-h - Print this message.
		-c - Execute module count times. 0 is the default and it means to run
		     indefinitely.
		-d - Directory where the results should be saved. Default is /tmp.
		-l - Save output of the script to a log file (dir/${0##*/}.pm.log).
		-p - Add prefix to saved files.
		-t - How long to wait before executing modules. Default is 1s.

		module - Module to execute. Currently supported: 'hwmon', 'throttle'. All modules are
		         executed by default.

		When started, ${0##*/} will enter loop to continuously execute specified
		modules. Each execution will be logged to stderr. Upon termination, all
		output will be dumped to /tmp or directory set by -d.

	HELP
}

count=0
debug=0
interval=1
log_to_file=no
prefix=""

while getopts c:d:hlp:t:v opt; do
	case "$opt" in
		c) count=$OPTARG ;;
		d) PM_OUTPUTDIR=$OPTARG ;;
		h)
			help
			exit 0
			;;
		l) log_to_file=yes ;;
		p) prefix=$OPTARG ;;
		t) interval=$OPTARG ;;
		v) debug=1 ;;
		*) ;;
	esac
done
shift $((OPTIND - 1))

declare -r log_file=${prefix:+${prefix}_}${0##*/}.pm.log

mkdir -p "$PM_OUTPUTDIR"
if [[ $log_to_file == yes ]]; then
	printf 'Redirecting to %s\n' "$PM_OUTPUTDIR/$log_file" >&2
	exec > "$PM_OUTPUTDIR/$log_file" 2>&1
fi

save_pm_pid
trap 'cleanup' EXIT
trap 'retag' USR1

init_modules "$@"

collect_readings
